package us.codecraft.webmagic.lianjia.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.lianjia.model.Area;
import us.codecraft.webmagic.lianjia.model.Lists;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.Date;
import java.util.List;

public class GetCategoryUrlProcessor implements PageProcessor {


    private Site site;

    private String domain;

    private String ulsSelectable;

    private String nameSelectable;
    private String urlSelectable;

    private String type;

    public GetCategoryUrlProcessor(String domainUrl, String ulsSelectable, String nameSelectable, String urlSelectable, String type) {
        this.domain = domainUrl;
        this.ulsSelectable = ulsSelectable;
        this.nameSelectable = nameSelectable;
        this.urlSelectable = urlSelectable;
        this.type = type;
        this.site = Site
                .me()
                .setDomain(domainUrl)
                .setSleepTime(3000)
                .setUserAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
    }

    public void process(Page page) {
        List<Selectable> urls = page.getHtml().$(ulsSelectable).nodes();
        for (Selectable ul : urls) {
            String url = domain + ul.xpath(urlSelectable).toString();
            String name = ul.xpath(nameSelectable).toString();
            new Lists()
                    .set("name", name)
                    .set("url", url)
                    .set("type", this.type)
                    .set("update_time", new Date())
                    .save();
        }
    }

    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        String domain = "https://sh.lianjia.com";
        //获取一级分类
//        String url = "https://sh.lianjia.com/ershoufang/";
//        String ulsSelectable = "div.position > dl:nth-child(2) > dd > div:nth-child(1) a";
//        String nameSelectable = "//a/@title";
//        String urlSelectable = "//a/@href";
//        String type = "area";

        //获取二级分类
//        String url = "https://sh.lianjia.com/ershoufang/pudong/";
//        String ulsSelectable = "div.position > dl:nth-child(2) > dd > div:nth-child(1) > div:nth-child(2) a";
//        String nameSelectable = "//a/text()";
//        String urlSelectable = "//a/@href";
//        String type = "towns";


        //获取分页数据
        //获取二级分类
        String url = "https://sh.lianjia.com/ershoufang/biyun/";
        String ulsSelectable = "div.leftContent > div.contentBottom.clear > div.page-box.fr > div";
        String nameSelectable = "//div/@page-data";
        String urlSelectable = "//div/@page-url";
        String type = "page";


        Spider.create(new GetCategoryUrlProcessor(domain, ulsSelectable, nameSelectable, urlSelectable, type)).addUrl(url)
                .addPipeline(new ConsolePipeline()).run();
    }
}
